In [1]:
from keras.datasets import mnist
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization, merge, Input
from keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D, AveragePooling2D, GlobalAveragePooling2D
from keras.utils import np_utils
from keras.models import model_from_json
from keras import backend as K
from keras.preprocessing import image
from keras.optimizers import SGD
from keras.utils.data_utils import get_file
import random
import os
import cv2
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline
(the two steps above were finished in Preprocessing train dataset.ipynb)
In [2]:
from keras.preprocessing.image import ImageDataGenerator
image_width = 224
image_height = 224
image_size = (image_width, image_height)
train_datagen = ImageDataGenerator(rescale=1.0/255)
train_generator = train_datagen.flow_from_directory(
'mytrain_ox', # this is the target directory
target_size=image_size, # all images will be resized to 224x224
batch_size=16,
class_mode='binary')
validation_datagen = ImageDataGenerator(rescale=1.0/255)
validation_generator = validation_datagen.flow_from_directory(
'myvalid_ox', # this is the target directory
target_size=image_size, # all images will be resized to 224x224
batch_size=16,
class_mode='binary')
In [3]:
x, y = train_generator.next()
plt.figure(figsize=(16, 8))
for i, (img, label) in enumerate(zip(x, y)):
plt.subplot(3, 6, i+1)
if label == 1:
plt.title('dog')
else:
plt.title('cat')
plt.axis('off')
plt.imshow(img, interpolation="nearest")
In [4]:
def identity_block(input_tensor, kernel_size, filters, stage, block):
nb_filter1, nb_filter2, nb_filter3 = filters
if K.image_dim_ordering() == 'tf':
bn_axis = 3
else:
bn_axis = 1
conv_name_base = 'res' + str(stage) + block + '_branch'
bn_name_base = 'bn' + str(stage) + block + '_branch'
x = Convolution2D(nb_filter1, 1, 1, name=conv_name_base + '2a')(input_tensor)
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
x = Activation('relu')(x)
x = Convolution2D(nb_filter2, kernel_size, kernel_size,
border_mode='same', name=conv_name_base + '2b')(x)
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
x = Activation('relu')(x)
x = Convolution2D(nb_filter3, 1, 1, name=conv_name_base + '2c')(x)
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
x = merge([x, input_tensor], mode='sum')
x = Activation('relu')(x)
return x
block: 'a','b'..., current block label, used for generating layer names
Note that from stage 3, the first conv layer at main path is with subsample=(2,2) And the shortcut should have subsample=(2,2) as well
In [5]:
def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)):
nb_filter1, nb_filter2, nb_filter3 = filters
if K.image_dim_ordering() == 'tf':
bn_axis = 3
else:
bn_axis = 1
conv_name_base = 'res' + str(stage) + block + '_branch'
bn_name_base = 'bn' + str(stage) + block + '_branch'
x = Convolution2D(nb_filter1, 1, 1, subsample=strides,
name=conv_name_base + '2a')(input_tensor)
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
x = Activation('relu')(x)
x = Convolution2D(nb_filter2, kernel_size, kernel_size, border_mode='same',
name=conv_name_base + '2b')(x)
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
x = Activation('relu')(x)
x = Convolution2D(nb_filter3, 1, 1, name=conv_name_base + '2c')(x)
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
shortcut = Convolution2D(nb_filter3, 1, 1, subsample=strides,
name=conv_name_base + '1')(input_tensor)
shortcut = BatchNormalization(axis=bn_axis, name=bn_name_base + '1')(shortcut)
x = merge([x, shortcut], mode='sum')
x = Activation('relu')(x)
return x
In [6]:
img_input = Input(shape=(image_width, image_height, 3))
x = ZeroPadding2D((3, 3))(img_input)
x = Convolution2D(64, 7, 7, subsample=(2, 2), name='conv1')(x)
x = BatchNormalization(axis=3, name='bn_conv1')(x)
x = Activation('relu')(x)
x = MaxPooling2D((3, 3), strides=(2, 2))(x)
x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')
x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')
x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f')
x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')
base_model = Model(img_input, x)
In [7]:
TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/\
v0.2/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'
weights_path = get_file('resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5',
TF_WEIGHTS_PATH_NO_TOP,
cache_subdir='models',
md5_hash='a268eb855778b3df3c7506639542a6af')
base_model.load_weights(weights_path)
In [8]:
x = AveragePooling2D((7, 7), name='avg_pool')(base_model.output)
x = Flatten()(x)
x = Dropout(0.5)(x)
x = Dense(1, activation='sigmoid', name='output')(x)
model = Model(input=base_model.input, output=x)
In [9]:
top_num = 4
for layer in model.layers[:-top_num]:
layer.trainable = False
for layer in model.layers[-top_num:]:
layer.trainable = True
In [10]:
model.compile(loss='binary_crossentropy', optimizer='adadelta', metrics=['accuracy'])
In [11]:
from keras.callbacks import ModelCheckpoint, TensorBoard
best_model = ModelCheckpoint("resnet_best.h5", monitor='val_acc', verbose=0, save_best_only=True)
In [12]:
model.fit_generator(
train_generator,
samples_per_epoch=2048,
nb_epoch=40,
validation_data=validation_generator,
nb_val_samples=1024,
callbacks=[best_model, TensorBoard(log_dir='./logs', histogram_freq=1)])
Out[12]:
In [13]:
with open('resnet.json', 'w') as f:
f.write(model.to_json())
In [14]:
with open('resnet.json', 'r') as f:
model = model_from_json(f.read())
model.load_weights('resnet_best.h5')
In [21]:
x, y = validation_generator.next()
plt.figure(figsize=(16, 8))
for i in range(16):
prediction = model.predict(np.expand_dims(x[i], axis=0))[0]
plt.subplot(3, 6, i+1)
if prediction < 0.5:
plt.title('cat %.2f%%' % (100 - prediction*100))
else:
plt.title('dog %.2f%%' % (prediction*100))
plt.axis('off')
plt.imshow(x[i])
The shape of the output of the base model is (7, 7, 2048).
The shape of the weights of full connection is (2048, 1).
In order to draw the heatmap, I calculated the Class Activation Mapping (cam) of the output of the network then used OpenCV to visualize the result.
$cam = (P-0.5)*output*w$
In [22]:
layer_dict = dict([(layer.name, layer) for layer in model.layers])
weights = model.layers[-1].get_weights()[0]
model2 = Model(input=model.input, output=[layer_dict['merge_16'].output, model.output])
In [28]:
x, y = validation_generator.next()
plt.figure(figsize=(16, 8))
for i in range(16):
img = (x[i]*255).astype(np.uint8)
[base_model_outputs, prediction] = model2.predict(np.expand_dims(x[i], axis=0))
prediction = prediction[0]
base_model_outputs = base_model_outputs[0]
plt.subplot(3, 6, i+1)
if prediction < 0.5:
plt.title('cat %.2f%%' % (100 - prediction*100))
else:
plt.title('dog %.2f%%' % (prediction*100))
cam = (prediction - 0.5) * np.matmul(base_model_outputs, weights)
cam -= cam.min()
cam /= cam.max()
cam -= 0.2
cam /= 0.8
cam = cv2.resize(cam, (224, 224))
heatmap = cv2.applyColorMap(np.uint8(255*cam), cv2.COLORMAP_JET)
heatmap[np.where(cam <= 0.2)] = 0
out = cv2.addWeighted(img, 0.8, heatmap[:,:,::-1], 0.4, 0)
plt.axis('off')
plt.imshow(out)
In [ ]: